
#---- Authors: Maria Nordbrandt, Gina Gustavsson, Karen Nielsen Breidahl
#---- The Unifying Magic, EJPR 2025


#This is the code used to produce Figure 1 in the main text and Figure D1 in the appendix.


# If needed, install the relevant packages before loading the relevant packages (install.packages("package")
library(haven)    
library(dplyr)    
library(tidyr)   
library(purrr)   
library(broom)    
library(ggplot2)  
library(patchwork)

#OBS! The dofiles to produce the variables must be run and dataset with the new variables saved before this code works.

# Import data
df <- read_dta("Replication_data.dta")

# Define outcome and treatment variables
outcomes   <- c("marry_n",  "marry_n_w",  "marry_dist_n",  "marry_dist_n_w",
                "dislike_n","dislike_n_w","dislike_dist_n","dislike_dist_n_w",
                "traits_n", "traits_n_w", "traits_dist_n","traits_dist_n_w")
treatments <- c("treat_flag", "treat_cake")




table(df$treat_flag)  # now should be 0/1

# Convert treatment to numeric
# Generate binary attention variable to be used later for Figure 2
df <- df %>% mutate(
  treat_flag = as.numeric(treat_flag),
  treat_cake = as.numeric(treat_cake),
  attention  = factor(attention,
                      levels = c(0,1),
                      labels = c("Attentive sample","Inattentive sample"))
)
#Inspect variables to check that coding is correct
table(df$treat_flag)  
table(df$treat_cake) 
table(df$attention) 

# Fit the two bivariate models for Sweden
mod_flag_sweden <- lm(marry_n ~ treat_flag,
                      data = df %>% filter(!is.na(id_se)))
mod_cake_sweden <- lm(marry_n ~ treat_cake,
                      data = df %>% filter(!is.na(id_se)))

# Look at the summaries and 95% CIs
summary(mod_flag_sweden)
confint(mod_flag_sweden)

summary(mod_cake_sweden)
confint(mod_cake_sweden)


# Build a grid of country × outcome × treatment combinations
grid <- expand_grid(
  country   = c("Sweden", "Denmark"),
  outcome   = outcomes,
  treatment = treatments
)

# Fit each bivariate model per country
# For every combination of country, outcome, and treatment, we:
# Subset the data to include only respondents from that country.
# Estimate a bivariate linear regression of each outcome on the treatment.
# Store all models and extract coefficient tables with confidence intervals.
results <- grid %>%
  mutate(
    fit = pmap(
      list(country, outcome, treatment),
      function(country, outcome, treatment) {
        # subset just as in Stata
        df_sub <- if (country == "Sweden") {
          filter(df, !is.na(id_se))
        } else {
          filter(df, !is.na(id_dk))
        }
        # fit lm(outcome ~ treatment)
        lm(
          formula = as.formula(paste0(outcome, " ~ ", treatment)),
          data    = df_sub
        )
      }
    ),
    tidy = map(fit, ~ tidy(.x, conf.int = TRUE))
  ) %>%
  unnest(tidy) %>%
  # keep only the treatment coefficient row
  filter(term == treatment) %>%
  select(country, outcome, treatment, estimate, std.error, conf.low, conf.high, p.value)

# Check that the output matches the Stata estimates
print(results)
# (they do)



# This function prepares the model results for visualization. 
# For each country, it:
# Filters out the relevant subset of results (either Sweden or Denmark).
# Re-codes the treatment indicators into more readable labels ("Flag" and "Cake").
# Groups the outcomes:
#       - "Soc. Distance" 
#       - "Trait stereo." 
#       - "Party Dislike" 
# 4. Creates sublabels distinguishing whether each outcome is weighted or unweighted, 
# and whether it refers to "spread" or "mean distance" (MDLP).
# Adds a flag indicating whether the effect is statistically significant at the 5% level.
# Converts variables into ordered factors to ensure consistent facet ordering
# across all panels in the final figure.

prep_df <- function(country_code) {
  results %>%
    filter(country == country_code) %>%
    mutate(
      treat = factor(recode(treatment,
                            "treat_flag" = "Flag",
                            "treat_cake" = "Cake"),
                     levels = c("Flag","Cake")),
      Attribute = case_when(
        grepl("^marry",   outcome) ~ "Soc. Distance",
        grepl("^traits",  outcome) ~ "Trait stereo.",
        grepl("^dislike", outcome) ~ "Party Disllike"
      ),
      SubLabel = case_when(
        grepl("_dist_n_w$",  outcome) ~ "MDLP, weighted",
        grepl("_dist_n$",    outcome) ~ "MDLP, unweighted",
        grepl("_n_w$",       outcome) ~ "Spread, weighted",
        grepl("_n$",         outcome) ~ "Spread, unweighted"
      ),
      significant = p.value < 0.05
    ) %>%
    mutate(
      SubLabel = factor(SubLabel, levels = c(
        "Spread, unweighted",
        "Spread, weighted",
        "MDLP, unweighted",
        "MDLP, weighted"
      )),
      Attribute = factor(Attribute, levels = c(
        "Soc. Distance","Trait stereo.","Party Dislike"
      ))
    )
}


# This code defines a unified visual base theme that controls the overall 
# appearance of the plots.

base_theme <- theme_minimal(base_size = 13) +
  theme(
    strip.background   = element_rect(fill = "grey95", color = NA),
    panel.grid.minor   = element_blank(),
    panel.spacing      = unit(0.6, "lines"),
    axis.text.y        = element_text(size = 8)
  )

# This function takes the prepared data for a given country and:
# Builds two panels — one for the flag treatment and one for the cake treatment.
# Within each panel, it plots point estimates with 95% confidence intervals,
# grouped by outcome.
# Applies consistent coloring (orange for significant results, grey otherwise)
# and shared axis range for comparability.

make_panels <- function(df, show_legend, title_text, hide_y) {
  extra_theme <- theme(
    strip.text.x      = element_text(face = "bold", size = 10, hjust = 0),
    plot.title        = element_text(hjust = 0.5, face = "bold", size = 10)
  )
  
  # common layers with thinner error bars and markers
  common_layers <- list(
    geom_vline(xintercept = 0, linetype = "dashed", color = "grey40"),
    geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, color = significant),
                   height = 0, size = 0.8),
    geom_point(aes(color = significant, fill = significant),
               shape  = 21, size = 3),
    scale_color_manual(
      values = c(`TRUE` = "#D55E00", `FALSE` = "grey70"),
      guide  = FALSE
    )
  )
  
  # Build flag panel 
  p_flag <- ggplot(filter(df, treat == "Flag"),
                   aes(x = estimate, y = SubLabel)) +
    common_layers +
    scale_fill_manual(
      values = c(`TRUE` = "#D55E00", `FALSE` = "grey70"),
      guide  = FALSE
    ) +
    geom_label(aes(label = sprintf("%.2f", estimate)),
               nudge_x       = 0.02,
               label.padding = unit(0.05, "lines"),
               size          = 2.5,
               fill          = "white",
               color         = "#333333",
               label.size    = 0.3) +
    scale_y_discrete(limits = rev(levels(df$SubLabel))) +
    scale_x_continuous(limits = c(-0.10, 0.09)) +
    facet_grid(rows = vars(Attribute), cols = vars(treat),
               switch = "y", scales = "free_y", space = "free_y") +
    labs(title = title_text, x = NULL, y = NULL) +
    base_theme + extra_theme +
    theme(
      axis.text.x       = element_text(size = 8),
      axis.ticks.x      = element_line(),
      axis.text.y       = if (hide_y) element_blank() else element_text(size = 8),
      strip.text.y.left = if (hide_y) element_blank() else element_text(face = "bold", size = 6),
      legend.position   = "none"
    )
  
  #Cake  Build cake panel 
  p_cake <- ggplot(filter(df, treat == "Cake"),
                   aes(x = estimate, y = SubLabel)) +
    common_layers +
    scale_fill_manual(
      values = c(`TRUE` = "#D55E00", `FALSE` = "grey70"),
      name   = "Significant at 95%",
      labels = c("No", "Yes"),
      guide  = guide_legend(override.aes = list(size = 3))
    ) +
    geom_label(aes(label = sprintf("%.2f", estimate)),
               nudge_x       = 0.02,
               label.padding = unit(0.05, "lines"),
               size          = 2.5,
               fill          = "white",
               color         = "#333333",
               label.size    = 0.2) +
    scale_y_discrete(limits = rev(levels(df$SubLabel))) +
    scale_x_continuous(limits = c(-0.10, 0.09)) +
    facet_grid(rows = vars(Attribute), cols = vars(treat),
               switch = "y", scales = "free_y", space = "free_y") +
    scale_color_manual(  # re-specify to ensure consistency
      values = c(`TRUE` = "#D55E00", `FALSE` = "grey70"),
      guide  = FALSE,
    ) +
    labs(title = NULL, x = NULL, y = NULL) +
    base_theme + extra_theme +
    theme(
      axis.text.x       = element_text(size = 8),
      axis.ticks.x      = element_line(),
      axis.text.y       = if (hide_y) element_blank() else element_text(size = 8),
      strip.text.y.left = if (hide_y) element_blank() else element_text(face = "bold", size = 6),
      legend.position   = if (show_legend) "bottom" else "none",
      legend.title      = element_text(size = 9),
      legend.text       = element_text(size = 8)
    )
  
  p_flag / p_cake
}

# Build separate Sweden and Denmark panels
p_swe <- prep_df("Sweden") %>%
  make_panels(show_legend = FALSE, title_text = "Sweden", hide_y = FALSE)

p_dnk <- prep_df("Denmark") %>%
  make_panels(show_legend = TRUE, title_text = "Denmark", hide_y = FALSE)

# Final 2 x 2 panel figure
(p_swe | p_dnk) + plot_layout(widths = c(1, 1))




############################################################

# Now prepare data for Figure D1. 
# This code estimates split sample regressions by the respondents’ attention levels. 
# Expand_grid() creates every combination of attention category (from df$attention), 
# outcome variables, and treatment.
# For each combination, pmap() fits a linear model of the outcome on the treatment
# among Swedish respondents (id_se not missing) with that attention level.
# broom::tidy() extracts coefficient estimates and confidence intervals for each model.
# Transmute() adds readable labels for the treatment and for 
# each outcome type (e.g., “Spread, weighted”).
# “significant” flags results with p < 0.05 as in Figure 1.
# Finally, mutate() converts the treatment and outcome labels to ordered factors 
# so plots will display them consistently.

res_sd <- expand_grid(
  attention = levels(df$attention),
  outcome   = c("marry_n","marry_n_w","marry_dist_n","marry_dist_n_w"),
  treatment = c("treat_flag","treat_cake")
) %>%
  mutate(
    fit = purrr::pmap(
      list(attention, outcome, treatment),
      function(attn, outcome, treatment) {
        lm(
          formula = as.formula(paste0(outcome, " ~ ", treatment)),
          data = df %>% filter(!is.na(id_se), attention==attn)
        )
      }
    ),
    tidy = purrr::map(fit, ~ broom::tidy(.x, conf.int=TRUE))
  ) %>%
  tidyr::unnest(tidy) %>%
  filter(term == treatment) %>%
  transmute(
    attention,
    treat       = if_else(treatment=="treat_flag","Flag","Cake"),
    SubLabel    = case_when(
      outcome=="marry_n"        ~ "Spread, unweighted",
      outcome=="marry_n_w"      ~ "Spread, weighted",
      outcome=="marry_dist_n"   ~ "MDLP, unweighted",
      outcome=="marry_dist_n_w" ~ "MDLP, weighted"
    ),
    estimate, conf.low, conf.high,
    significant = p.value < 0.05
  ) %>%
  mutate(
    treat    = factor(treat, levels=c("Flag","Cake")),
    SubLabel = factor(SubLabel, levels=c(
      "Spread, unweighted",
      "Spread, weighted",
      "MDLP, unweighted",
      "MDLP, weighted"
    ))
  )


# Plot Figure 2, again as a 2 x 2 figure, but this time by attention and treatments.

p <- ggplot(res_sd, aes(x = estimate, y = SubLabel)) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, color = significant),
                 height = 0.1, size = 0.9) +
  geom_label(aes(label = sprintf("%.2f", estimate)),
             nudge_y       = 0.2,     
             label.padding = unit(0.05, "lines"),
             size          = 2.5,
             fill          = "white",
             color         = "#333333",
             label.size    = 0.2) +
  geom_point(aes(color = significant),
             shape = 16, size = 4) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") +
  scale_color_manual(
    name   = "Significant at 95%",
    values = c(`TRUE`  = "#D55E00", `FALSE` = "grey70"),
    labels = c("Yes","No")
  ) +
  facet_grid(
    rows = vars(treat),
    cols = vars(attention),
    switch = "y", scales = "free_y", space = "free_y"
  ) +
  scale_y_discrete(limits = rev(levels(res_sd$SubLabel))) +
  labs(
    title = "Sweden",
    x     = "",
    y     = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title        = element_text(face = "bold", hjust = 0.5, size = 12),
    strip.background  = element_rect(fill = "grey95", color = NA),
    strip.text.x      = element_text(face = "bold", size = 10, hjust = 0),
    strip.text.y.left = element_text(face = "bold", size = 9),
    panel.grid.minor  = element_blank(),
    panel.spacing     = unit(0.8, "lines"),
    axis.text.y       = element_text(size = 9),
    axis.text.x       = element_text(size = 8),
    legend.position   = "bottom",
    legend.title      = element_text(size = 9),
    legend.text       = element_text(size = 8)
  )

print(p)


